#   LibAut
#       outcome regressions: multiple
#       Table 1 column (model) 4, Table C5
#   Juraj Medzihorsky
#   2021-08-12

library(stargazer)
library(margins)
library(logistf)
library(arm)
library(parallel)
options(mc.cores=3)
options(stringsAsFactors=F)
source('helpers.R')
tstat <- function(x) coef(x)/se.coef(x)

#   sessionInfo()
##  R version 3.6.3 (2020-02-29)
##  Platform: x86_64-pc-linux-gnu (64-bit)
##  Running under: Ubuntu 16.04.7 LTS
##
##  Matrix products: default
##  BLAS/LAPACK: /opt/OpenBLAS/lib/libopenblas_haswellp-r0.3.9.so
##
##  locale:
##   [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
##   [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_GB.UTF-8       LC_NAME=C
##   [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
##  attached base packages:
##  [1] parallel  stats     graphics  grDevices utils     datasets  methods   base
##
##  other attached packages:
##  [1] arm_1.9-3       lme4_1.1-18-1   Matrix_1.3-4    MASS_7.3-54     logistf_1.24    margins_0.3.23  stargazer_5.2.
##  2
##  [8] nvimcom_0.9-28  colorout_1.2-0
##
##  loaded via a namespace (and not attached):
##   [1] Rcpp_1.0.6           nloptr_1.2.1         compiler_3.6.3       pillar_1.6.1         tools_3.6.3
##   [6] rpart_4.1-13         lifecycle_1.0.0      tibble_3.1.2         nlme_3.1-152         lattice_0.20-44
##  [11] mgcv_1.8-28          pkgconfig_2.0.3      rlang_0.4.11         DBI_1.0.0            coda_0.19-4
##  [16] dplyr_1.0.6          generics_0.1.0       vctrs_0.3.8          nnet_7.3-16          grid_3.6.3
##  [21] tidyselect_1.1.1     mice_3.3.0           glue_1.4.2           data.table_1.11.8    R6_2.5.0
##  [26] fansi_0.5.0          survival_2.44-1.1    mitml_0.3-6          prediction_0.3.6.1   minqa_1.2.4
##  [31] purrr_0.3.4          tidyr_1.1.3          magrittr_2.0.1       backports_1.2.1      operator.tools_1.6.3
##  [36] formula.tools_1.7.1  ellipsis_0.3.2       splines_3.6.3        abind_1.4-5          assertthat_0.2.1
##  [41] utf8_1.2.1           broom_0.7.3          pan_1.6              crayon_1.4.1         jomo_2.6-5

#------------------------------------------------------------------------------

code_dir <- getwd()
data_dir <- gsub('code$', 'data', code_dir)
tabs_dir <- gsub('code$', 'tables', code_dir)

setwd(data_dir)
dat <- readRDS('outcome_multiple_20210810.rds')
setwd(code_dir)

dat$reg_fac <- as.factor(dat$e_regionpol_6C)
dat <- subset(dat, !(dem_ep_id%in%'HRV_1992_2004'))

#------------------------------------------------------------------------------

#   formulas
fl0 <- fy0 <- epy ~ 1
linchunk1 <- ~ . + 
    reg_fac +
    lag1_e_migdppcln + 
    lag1_e_migdpgro + 
    lag1_logpop +
    lag1_v2pepwrsoc + 
    lag1_v2xeg_eqdr + 
    lag1_v2xnp_pres + 
    lag1_v2csprtcpt +
    lag1_v2x_polyarchy +
    lag1_excl_region_edi + 
    lag1_e_miinteco + 
    lag1_e_miinterc  
linchunk2 <- ~ . + 
    I((1e-1*(year-1990))) + I((1e-1*(year-1990))^2) + I((1e-1*(year-1990))^3) 
fl1 <- update(fy0, linchunk1)
fl2 <- update(fl1, linchunk2)

#   gaussian-linear GLM under MLE
o0 <- lm(fy0, data=dat)
o1 <- lm(fl1, data=dat)
o2 <- lm(fl2, data=dat)

#   bernoulli-probit GLM under MLE
p0 <- glm(fy0, data=dat, family=binomial('probit'))
p1 <- glm(fl1, data=dat, family=binomial('probit'))
p2 <- glm(fl2, data=dat, family=binomial('probit'))

#   bernoulli-logistic GLM under MLE
l0 <- glm(fy0, data=dat, family=binomial('logit'))
l1 <- glm(fl1, data=dat, family=binomial('logit'))
l2 <- glm(fl2, data=dat, family=binomial('logit'))

#   bernoulli-logistic GLM under FPL
mcont <- logistf.control(maxit=1e4, maxstep=1e4)
f0 <- logistf(fy0, data=dat, control=mcont)
f1 <- logistf(fl1, data=dat, control=mcont)
f2 <- logistf(fl2, data=dat, control=mcont)

#   bernoulli-logistic GLM under bayes
b0 <- bayesglm(fl0, data=dat, family=binomial('logit'))
b1 <- bayesglm(fl1, data=dat, family=binomial('logit'))
b2 <- bayesglm(fl2, data=dat, family=binomial('logit'))

#   round(cbind(coef(o1), coef(l1), coef(p1), coef(f1), coef(b1)), 1)
#   round(cbind(coef(o2), coef(l2), coef(p2), coef(f2), coef(b2)), 1)
#   round(cbind(tstat(o1), tstat(l1), tstat(p1)), 1)
#   round(cbind(tstat(o2), tstat(l2), tstat(p2)), 1)

#-------------------------------------------------------------------------------   
#   stargazer doesn't support logistf

setwd(tabs_dir)

#   this is good only as a blueprint for the tex formatting
regtab <- 
    stargazer(o1, o2, l1, l2, p1, p2,
              digits=2, report='vcs')
writeLines(regtab, con='outcome_regressions_multiple_1.tex') 

#-------------------------------------------------------------------------------   
#   add logistf by hand

bb <- 
    data.frame(o1=c(round(coef(o1),2), rep(NA,3)),
               o2=c(round(coef(o2),2)),
               l1=c(round(coef(l1),2), rep(NA,3)),
               l2=c(round(coef(l2),2)),
               f1=c(round(coef(f1),2), rep(NA,3)),
               f2=c(round(coef(f2),2)))

bt <- paste(apply(bb, 1, paste, collapse=' & '), '\\')

ci <-
    data.frame(
        o1=c(paste0('(', round(confint(o1),2)[,1], ', ', round(confint(o1),2)[,2], ')'), rep('',3)),
        o2=  paste0('(', round(confint(o2),2)[,1], ', ', round(confint(o2),2)[,2], ')'),
        l1=c(paste0('(', round(confint(l1),2)[,1], ', ', round(confint(l1),2)[,2], ')'), rep('',3)),
        l2=  paste0('(', round(confint(l2),2)[,1], ', ', round(confint(l2),2)[,2], ')'),
        f1=c(paste0('(', round(confint(f1),2)[,1], ', ', round(confint(f1),2)[,2], ')'), rep('',3)),
        f2=  paste0('(', round(confint(f2),2)[,1], ', ', round(confint(f2),2)[,2], ')'))

ct <- paste(apply(ci, 1, paste, collapse=' & '), '\\')

bt <- gsub('-', '$-$', bt)
ct <- gsub('-', '$-$', ct)
bt <- gsub('NA', '', bt)
ct <- gsub('NA', '', ct)

tab <- character(length=4*length(bt))
for (i in 1:length(bt)) {
    ii <- (i-1)*3 + i
    tab[ii] <- rownames(bb)[i]
    tab[ii+1] <- paste0(' & ', bt[i])
    tab[ii+2] <- paste0(' & ', ct[i])
}

#   This is:
#       - Last column in Table 1
#       - most of Table C.5 
writeLines(tab, con='Table_C5.tex')


#   Additional info into Table C.5
round(c(AIC(o1), AIC(o2), AIC(l1), AIC(l2), f1$aic, f2$aic), 2)
# 220.25 207.65 214.51 196.92

auc <- 
    c(getROC_AUC(fitted(o1), dat$epy)$auc,
      getROC_AUC(fitted(o2), dat$epy)$auc,
      getROC_AUC(fitted(l1), dat$epy)$auc,
      getROC_AUC(fitted(l2), dat$epy)$auc,
      getROC_AUC(f1$predict, dat$epy)$auc,
      getROC_AUC(f2$predict, dat$epy)$auc)

#   Additional info into Table C.5 
round(auc, 2)
# 0.89 0.92 0.90 0.92 0.90 0.92

#   LPO AUC and LOO CC are in different scripts

#   #   #   not reported
#   round(summary(o1)$adj, 2)
#   round(summary(o2)$adj, 2)
#   #   0.41, 0.45

#   SCRIPT END